## Join all datasets

## Output file: combined.csv
## Unit of observation (row): member within a vote
## Written to the working directory

## Input files:
##   votes.csv
##   behaviour.csv
##   names.csv
##   clubs.csv
##   committees.csv
## Files must be in the working directory


# define helper function ----

make_subindex <- function(vector) {
  vector |>
    rle() |>
    getElement("lengths") |>
    lapply(seq_len) |>
    unlist()
}


# read all datasets ----

votes <- read.csv("votes.csv")
behaviour <- read.csv("behaviour.csv")
names <- read.csv("names.csv")
clubs <- read.csv("clubs.csv")
committees <- read.csv("committees.csv")


# reshape clubs and committees to wide ----

clubs <-
  clubs |>
  transform(index = make_subindex(id_member)) |>
  reshape(direction = "wide", idvar = "id_member", timevar = "index", sep = "_")

committees <-
  committees |>
  transform(index = make_subindex(id_member)) |>
  reshape(direction = "wide", idvar = "id_member", timevar = "index", sep = "_")


# join all datasets ----

combined <-
  behaviour |>
  merge(names, by = "id_member", all = TRUE) |>
  merge(clubs, by = "id_member", all = TRUE) |>
  merge(committees, by = "id_member", all = TRUE) |>
  merge(votes, by = "id_vote", all = TRUE)


# sort by vote and member ----

combined <- combined[with(combined, order(id_vote, id_member)), ]


# write dataset ----

write.csv(combined, file = "combined.csv", row.names = FALSE)